# clear environment
rm(list=ls())

# laod packages
library(ggplot2)

# load data set with keyword counts and crime numbers
dat = read.csv("police_metadata.csv")

# create logged ratio on issue level
dat$kw_ratio_log <- log(dat$count_right_kw + 0.5) - log(dat$count_left_kw + 0.5)

# create crime ratio
dat$extreme_crime_ratio_log <- log(dat$extreme_crime_right + 0.5) - log(dat$extreme_crime_left + 0.5)

# impute missing values in crime ratio with federal level crime ratio
bund_dat <- dat[dat$jurisdiction=="bund", c("year", "extreme_crime_right", "extreme_crime_left")]
bund_dat$extreme_crime_ratio_log_fed <- log(bund_dat$extreme_crime_right + 0.5) - log(bund_dat$extreme_crime_left + 0.5)
bund_dat <- bund_dat[,c("year", "extreme_crime_ratio_log_fed")]
bund_dat = bund_dat[!duplicated(bund_dat$year),]

dat <- merge(dat, bund_dat, by="year", all.x=T)

dat$extreme_crime_ratio_log_imp <- ifelse(is.na(dat$extreme_crime_ratio_log), dat$extreme_crime_ratio_log_fed, dat$extreme_crime_ratio_log)

# create bias measures
dat$bias_ratio_extreme_imp <- dat$kw_ratio_log - dat$extreme_crime_ratio_log_imp

# change reference level for police unions
dat$union <- factor(dat$union, levels=c("gdp", "dpolg"))

## regression models (NOTE: we use section fixed effects, which are almost the same as jurisdictions. For simplicity, in the paper we refer to them as "jurisdiction fixed effects")
# position
mod_1 <- lm(kw_ratio_log~factor(union), data=dat)
mod_2 <- lm(kw_ratio_log~factor(union)+factor(section), data=dat)
mod_3 <- lm(kw_ratio_log~factor(union)+factor(section)+factor(year), data=dat)
# bias
mod_4 <- lm(bias_ratio_extreme_imp~factor(union), data=dat)
mod_5 <- lm(bias_ratio_extreme_imp~factor(union)+factor(section), data=dat)
mod_6 <- lm(bias_ratio_extreme_imp~factor(union)+factor(section)+factor(year), data=dat)

stargazer::stargazer(mod_1, mod_2, mod_3, mod_4, mod_5, mod_6,
                     digits=2, star.cutoffs = c(0.05, 0.01, 0.001))


## coefficient plot

ests_df = rbind.data.frame(summary(mod_1)$coefficients[2,1:2],
                           summary(mod_3)$coefficients[2,1:2],
                           summary(mod_4)$coefficients[2,1:2],
                           summary(mod_6)$coefficients[2,1:2])
names(ests_df) = c("coef", "se")
ests_df$outcome = c("Position", "Position", "Bias", "Bias")
ests_df$outcome = factor(ests_df$outcome, levels=c("Position", "Bias"))
ests_df$mod = c("w/o Fixed Effects", "w/ Year and State Fixed Effects", "w/o Fixed Effects", "w/ Year and State Fixed Effects")
ests_df$mod = factor(ests_df$mod, levels = c("w/o Fixed Effects", "w/ Year and State Fixed Effects"))

ggplot(data=ests_df, aes(x=outcome, y=coef, group=factor(mod), shape=factor(mod))) + 
  geom_errorbar(aes(ymax=coef+1.96*se, ymin=coef-1.96*se), width=0, position = position_dodge(width = 0.5)) +
  geom_point(position=position_dodge(width=.5), size=2.5) + 
  theme_classic() +
  #  theme_bw() +
  theme(panel.grid = element_blank()) +
  geom_hline(yintercept = 0, linetype="dashed") +
  theme(legend.position = "bottom", text = element_text(size=18)) +
  xlab("Outcome Variable") + ylab("Coefficient of DPolG") +
  scale_shape_manual(values=c(16, 17), name="")

ggsave("Fig4_6.pdf", width = 7, height=5)


